import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
import java.io.FileInputStream;

public class CorpusParser {
	
	private NodeList sentences;

	public CorpusParser (File corpusFile) throws Throwable {
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
		DocumentBuilder builder = factory.newDocumentBuilder();
		Document document = builder.parse(new FileInputStream(corpusFile));
		Element corpusElem = document.getDocumentElement();
		Element articleElem = (Element) corpusElem.getChildNodes().item(0);
		Element paraElem = (Element) articleElem.getChildNodes().item(0);
		sentences = paraElem.getChildNodes();
	}
	
	public NodeList getSentences() {
		return sentences;
	}
}
